% scribe: Moorea Brega
% lastupdate: Oct. 2, 2005
% lecture: 5
% title: Weak Law of Large Numbers
% references: Durrett, sections 1.4 and 1.5 
% keywords: i.i.d., independent identically distributed, product spaces, independence, Kolmogorov's extension theorem, construction of independent random variables,  WLLN, weak law of large numbers, truncation
% end

\documentclass[12pt, letterpaper]{article}

\include{macros}


\newtheorem{remark}[theorem]{Remark}


\begin{document}

 \lecture{5}{Weak Law of Large Numbers}{Moorea Brega}{brega@stat.berkeley.edu}

References: Durrett [Sections 1.4, 1.5]

\section{Independence}
% keywords: i.i.d., independent identically distributed, product spaces, independence, Kolmogorov's extension theorem, construction of independent random variables
% end


Denote by $(\Omega,\F,\P)$ a probability space.  Consider a sequence of random variables $X_1, X_2, \ldots$ with some distributions on $(\R,\borel)$, 
\[
P(X_i \le x) = F_i(x)
\]

where $F_i$ is the cumulative distribution function of $X_i$.  If $F_i(x) \equiv F(x)$, we say the $X_i$ are \textit{identically distributed}.  If 

\begin{equation}\label{eqn:indep}
\P \left( \bigcap_{i=1}^n \{X_i \le x_i\}  \right) = \prod_{i=1}^n F_i(x_i)
\end{equation}

\noindent for all choices of $x_i \in \R$, $X_1,X_2,\ldots, X_n$ are \textit{independent}.

\begin{remark}
For any choice of cumulative distribution functions $F_i$ there exists a probability space $(\Omega, \F,\P)$ with random variables $X_i$ which are independent.
\end{remark}

The remark follows from our discussion of product spaces.  Consider only two random variables, $X_1$ and $X_2$.  As seen in Lecture 4, there is a product measure on $\Omega = \R \times \R$ with projection maps $X_i$, $X_i(\omega) = x_i$ where $\omega = (x_1,x_2)$.  This idea extends easily to finite $n$, with $\Omega = \R \times \R \times \cdots \times \R = \R^n$ and projection maps $X_i(\omega) = x_i$ for $i=1,2,\ldots, n$.  The following is a simple example of the finite $n$ case where the random variables have densities.  

\begin{example}  
Consider 
\[
F_i(x) = \int_{-\infty}^x f_i(y) dy
\]
where $f_i$ is the density of $X_i$.  Then, the joint law of $(X_1,\ldots, X_n)$ on $\R^n$ has density 
\[
h(x) = \prod_{i=1}^n f_i(x_i)
\]
with respect to Lebesgue measure $dx_1, dx_2, \ldots dx_n$.
\end{example}

For an infinite sequence of random variables, $X_1,X_2,\ldots$, we must consider the infinite product space, $\Omega = \R \times \R \times \cdots = \R^{\infty}$ equipped with the projections $X_i(\omega) = x_i$ where $\omega = (x_1,x_2,\ldots)$.  

\begin{remark}
It is a nontrivial fact of measure theory that there exists a unique probability measure $\P$ on $(\Omega,\F)$, where $\F$ is the product $\sigma$-field generated by $X_i$, so that for every $n$, $X_1, X_2, \ldots X_n$ are independent as in equation~(\ref{eqn:indep}).  The proof uses Kolmogorov's extension theorem.  
\end{remark}

\subsection{Construction of Independent Random Variables}

As a small digression, we consider a method of constructing independent random variables $X_i$ on ($[0,1], Leb$).  As discussed previously (Lecture 2), we can create a random variable $X_1$ with distribution $F_1$ by using the inverse of the distribution, $X_1 = F_1^{-1}(U_1)$, where $U_1$ is a $\mathrm{uniform} [0,1]$ random variable.  

Using this method, to generate $n$ independent random variables $X_i$ with distributions $F_i$, we will need to start with $n$ independent uniform random variables.  The following is a useful method for generating any number of independent uniforms from a single $\mathrm{uniform}[0,1]$ random variable.  First, we consider the simple case of generating two i.i.d. uniform random variables from a single uniform $U$.  We begin by considering the binary expansion of $U$, 

\[
U = \frac{D_1}{2}+\frac{D_2}{2^2}+\frac{D_3}{2^3}+\ldots
\]

\noindent where $D_i$ is the $i^{\mathrm{th}}$ digit in the binary expansion.  Each $D_i$ takes on the value 0 or 1 with equal probability on subintervals of [0,1].  Thus, if we then let

\begin{align*}
U_1 = \frac{D_1}{2}+\frac{D_3}{2^2}+\frac{D_5}{2^3}+\ldots \\
U_2 = \frac{D_2}{2}+\frac{D_4}{2^2}+\frac{D_6}{2^3}+\ldots,
\end{align*}

the random variables $U_1$ and $U_2$ are uniform$[0,1]$ and independent (a result of the fact that functions of disjoint collections of independent random variables are independent).  This method can be used to generate a finite or an infinite sequence of independent uniform random variables.  For an infinite sequence of random variables, we consider

\[
\N = \bigcup_{i=1}^{\infty} N_i
\]

\noindent where $|N_i| = \infty$ and the $N_i$ are disjoint.  The construction above is repeated with $U_i$ defined using the digits $D_j$ where $j \in N_i$.


\section{Weak Law of Large Numbers}
% keywords: WLLN, weak law of large numbers, truncation
% end

The Weak Law of Large Numbers is a statement about sums of independent random variables.  Before we state the WLLN, it is necessary to define convergence in probability.  We say $Y_n$ \textit{converges in probability} to $Y$ and write $Y_n \pcv Y$ if, $\forall \epsilon > 0$,
$$P(\omega: |Y_n(\omega) - Y(\omega)| > \epsilon) \rightarrow 0,\ \ \
n \rightarrow \infty.$$
 

\begin{theorem}[Weak Law of Large Numbers]
Let $X, X_1, X_2, \ldots$ be a sequence of i.i.d. random variables with $E|X| < \infty$ and define $S_n =
X_1 + X_2 + \cdots + X_n$.  Then
$$\frac{S_n}{n} \pcv EX.$$
\end{theorem} 

\begin{proof}
In this proof, we employ the common strategy of first proving the result under an $L^2$ condition (i.e.\ assuming that the second moment is finite), and then using truncation to get rid of the extraneous moment condition.  

First, we assume $EX^2 < \infty$.  Because the $X_i$ are iid,

\[
\mathrm{Var}\left(\frac{S_n}{n}\right) = \frac{1}{n^2} \sum_{i=1}^n \mathrm{Var}(X_i)  = \frac{\mathrm{Var}(X)}{n}.
\]
By Chebychev's inequality, $\forall\ \epsilon > 0$,
$$\P\left(\left|\frac{S_n}{n} - EX\right| > \epsilon \right) \leq
\frac{1}{\epsilon^2}\mathrm{Var} \left(\frac{S_n}{n}\right) =
\frac{\mathrm{Var}(X)}{n\epsilon^2}\ \rightarrow 0.$$

Thus, $\frac{S_n}{n} \pcv EX$ under the finite second moment condition.  To transition from $L^2$ to $L^1$, we use truncation.   For $0 < x < \infty$ let

\begin{align*}
X_{xk} = X_k \1_{(|X_k| \le x)} \\
Y_{xk} = X_k \1_{(|X_k| > x)}
\end{align*}

Then, we have $X_k = X_{xk}+Y_{xk}$ and 

\begin{align*}
\frac{S_n}{n} &= \frac{1}{n}\sum_{k=1}^{n}X_{xk} + \frac{1}{n}\sum_{k=1}^{n}Y_{xk}\\ &= U_{xn} + V_{xn}
\end{align*}

Applying Jensen's inequality, we have

\[
E \left| \frac{1}{n} \sum_{k=1}^n Y_{xk} \right| \le  \frac{1}{n} \sum_{k=1}^n E|Y_{xk}| = E(|X| \1_{(|X| > x)})
\]

and by DCT,
$$E(|X|\1_{(|X| > x)}) \rightarrow 0,\ \ \ \ x \rightarrow \infty.$$

Fix $1 > \epsilon > 0$ and choose $x$ such that


\[
E\left( |X|\1_{(|X|>x)} \right) = E|Y_{x1}|  < \epsilon^2.
\]

Let $\mu_x = E(X_{x1})$ and $\mu = E(X)$.  Then, we also have 

\[
|\mu_x - \mu| \le |E(Y_{x1})| <  \epsilon^2 < \epsilon.
\]

Let $B_n = \{|U_{xn} - \mu_x| > \epsilon\}$ and $C_n = \{|V_{xn}| > \epsilon\}$.  Noting that $E(X_{xk}^2) \le x^2 < \infty$, we can apply the Weak Law of Large Numbers to $U_{xn}$.  Thus, we choose $N > 0$ such that $\forall\ n > N$,

$$\P(B_n) = \P(|U_{nx} - \mu_x| > \epsilon) < \epsilon. $$

Now, by Chebyshev's inequality, we also have

\[
\P(C_n) = \P(|V_{xn}| > \epsilon) \le
\frac{E|V_{xn}|}{\epsilon} \le
\frac{E|Y_{x1}|}{\epsilon} \le \epsilon
\]

But on $B_n^c \cap C_n^c = (B_n \cup C_n)^c$, we have $|U_{xn} - \mu_x| \le \epsilon$  and $|V_{xn}| \le \epsilon$, and therefore

\[
\left| \frac{S_n}{n} - \mu \right| \le |U_{xn} - \mu_x | + |V_{xn}| + |\mu_x - \mu | \le 2\epsilon + \epsilon^2 \le 3\epsilon. 
\]

Thus, $\forall\ n > N$,
\[
\P \left(\left|\frac{S_n}{n} - EX\right| > 3\epsilon \right) \le \P(B_n \cup C_n) \le 2\epsilon.
\]
\end{proof}

\end{document}